***************
* This script generates the human capital index based on sector-specific returns
* Author: Daniel Kopp
***************

	clear
	clear matrix
	clear mata
	set more off	
	set maxvar 8000
	
	* We define the following globals before loading the data since this is much faster
	preserve
	import excel "Help_files\interactvar.xlsx", clear firstrow
	quietly ds
	global interactvar = r(varlist)
	restore
	
	preserve
	import excel "Help_files\interactgender.xlsx", clear firstrow
	quietly ds
	global interactgender = r(varlist)
	restore
	
	preserve
	import excel "Help_files\interactethnicity.xlsx", clear firstrow
	quietly ds
	global interactethnicity = r(varlist)
	restore

	use "data_processed\with_interactions_compr_all_full.dta", clear
	
	*  We don't need the interactions for this analysis:	
	drop $interactvar $interactgender $interactethnicity

	* Gen relative rank:
	gen rel_rank = rank/n_results
	gen rel_rank_sq = rel_rank^2
	gen rel_rank_3  = rel_rank^3
	
	* Gen cat. var. for rel. rank: 
	gen 	rel_rank_cat = 1 if inrange(rel_rank,0.0001,0.100001)
	replace rel_rank_cat = 2 if inrange(rel_rank,0.1001,0.200001)
	replace rel_rank_cat = 3 if inrange(rel_rank,0.2001,0.300001)
	replace rel_rank_cat = 4 if inrange(rel_rank,0.3001,0.400001)
	replace rel_rank_cat = 5 if inrange(rel_rank,0.4001,0.500001)
	replace rel_rank_cat = 6 if inrange(rel_rank,0.5001,0.600001)
	replace rel_rank_cat = 7 if inrange(rel_rank,0.6001,0.700001)
	replace rel_rank_cat = 8 if inrange(rel_rank,0.7001,0.800001)
	replace rel_rank_cat = 9 if inrange(rel_rank,0.8001,0.900001)
	replace rel_rank_cat = 10 if inrange(rel_rank,0.9001,100001)		
	
	count if rel_rank_cat==.
	
	* Gen dummies for rel_rank_cat
	local val_label : value label rel_rank_cat
	flevelsof rel_rank_cat, local(levels)
	foreach i of local levels {
	gen 	byte rel_rank_cat`i' = rel_rank_cat==`i'
	}	
	
	local val_label : value label education_isced
	levelsof education_isced, local(levels)
	foreach i of local levels {
	gen byte	education_isced_`i' = education_isced==`i'
	replace  	education_isced_`i' =. if education_isced==.
	local lab_`i' : label `val_label' `i'
	label var education_isced_`i' " `lab_`i'' "
	}	
			
	* Gen dummies for rank
	local rank ""
	local val_label : value label rank
	flevelsof rank, local(levels)
	foreach i of local levels {
	gen 	byte rank`i' = rank==`i'
	local varname "rank`i'"
	local rank: list rank | varname
	}
	global rank "`rank'"
	di "$rank"

	****************************
	* Define Globals
	****************************
		
	global lang_dummy_full 	"l_German l_CH_German l_English l_French  l_Italian l_northwest	l_southeuro l_centreast l_Balkan l_middleeast l_Asia l_Other l_missing"
	global skills_dummy 		"has_skills sk_det_lang0 sk_det_lang1 sk_det_lang2 sk_det_lang3 sk_det_lang4   sk_exp  sk_education   sk_educ_tert  sk_educ_lehre  sk_educ_weiter sk_softskills sk_it_gen sk_it_deep sk_machines sk_leadership sk_language sk_lgerman sk_lfrench sk_litalian sk_lenglish sk_lchgerman sk_exp_dur_c0 sk_exp_dur_c1 sk_exp_dur_c2 sk_exp_dur_c3 sk_exp_dur_c4 " 	
	
	* Without geschlecht	workvolume_c1 workvolume_c2 workvolume_c3 workvolume_c_m
	global other_dummy "erfa_s0 erfa_s1 erfa_s2 erfa_s3 erfa_s_m mobility_cat0 mobility_cat1 mobility_cat2 mobility_cat3 mobility_cat4 mobility_cat_m 	limcontract0 limcontract1 limcontract99 verfuegbar0 verfuegbar1 verfuegbar99 no_searchvar no_diff_prof_m 	zuletzt_s abschluss_s0 abschluss_s1 abschluss_s2 abschluss_s3 abschluss_s_m gesch_kontakt Sonn_Feiertag Schichtarbeit Nachtarbeit Heimarbeit Lehre fuehrer_kat0 fuehrer_kat1 fuehrer_kat2 fuehrer_kat3 fuehrer_kat4 fuehrer_kat5 fuehrer_kat6 fuehrer_kat_m education_isced_1 education_isced_2 education_isced_3 education_isced_6 education_isced_7 education_isced_8 education_isced_999 prof_length_1 prof_length_2 prof_length_3 prof_length_4 prof_length_99 user_logged_in"
	global kanton "kanton1 kanton2 kanton3 kanton4 kanton5 kanton6 kanton7 kanton8 kanton9 kanton10 kanton11 kanton12 kanton13 kanton14 kanton15 kanton16 kanton17 kanton18 kanton19 kanton20 kanton21 kanton22 kanton23 kanton24 kanton25 kanton26"
	global lang_region "lang_region1 lang_region2 lang_region3 lang_region4"
	global other_contin " sk_est_n_word no_diff_prof prof_exp_tot"
	global occup_dummy "bn2000_1_s1 bn2000_1_s2 bn2000_1_s3 bn2000_1_s4 bn2000_1_s5 bn2000_1_s6 bn2000_1_s7 bn2000_1_s8 bn2000_1_s9"
	global squared "sk_est_n_word_2 no_diff_prof_2 prof_exp_tot_2"
	global nation_name_kat "nation_kat0 nation_kat1 nation_kat2 nation_kat3 nation_kat4 nation_kat5 nation_kat6 nation_kat8 nation_kat9 nation_kat_m name_kat0 name_kat1 name_kat2 name_kat3 name_kat4 name_kat5 name_kat6 name_kat7 name_kat8 name_kat_m"
	global rel_rank_cat "rel_rank_cat1 rel_rank_cat2 rel_rank_cat3 rel_rank_cat4 rel_rank_cat5 rel_rank_cat6 rel_rank_cat7 rel_rank_cat8 rel_rank_cat9 rel_rank_cat10"
	
	
	****************************
	* Sample restriction
	****************************

	* Sample restrictions
	run "Help_files\sample_restrictions_parttime.do"
	
	* We keep only those searches in which the candidate pool has not been restricted to either full- or part-time workers
	keep if s_workload==.
		
	****************************
	* Gen new var and dummy vars
	****************************	
	
	gen base1 = 0
	gen base2 = 0
	gen base3 = 0
	global base_cat "base1 base2 base3"
	
	global human_capital "abschluss_s0 abschluss_s1 abschluss_s2 abschluss_s3 erfa_s0 erfa_s1 erfa_s2 erfa_s3 fuehrer_kat0 fuehrer_kat1 fuehrer_kat2 fuehrer_kat3 fuehrer_kat4 fuehrer_kat5 fuehrer_kat6 education_isced_999 education_isced_1 education_isced_2 education_isced_3 education_isced_6 education_isced_7 education_isced_8 $skills_dummy sk_est_n_word sk_est_n_word_2 $lang_dummy_full"	  
	disp "$human_capital"
	
	* Gen isco 1-digit
	recode s_isco_08_2 (10=99) (20=99) (30=99) (50=99) (70=99) (90=99)	
	
	gen s_isco_08_1 = .
	replace s_isco_08_1 = 1 if inrange(s_isco_08_2,11,14)
	replace s_isco_08_1 = 2 if inrange(s_isco_08_2,21,26)
	replace s_isco_08_1 = 3 if inrange(s_isco_08_2,31,35)
	replace s_isco_08_1 = 4 if inrange(s_isco_08_2,41,44)
	replace s_isco_08_1 = 5 if inrange(s_isco_08_2,51,54)
	replace s_isco_08_1 = 6 if inrange(s_isco_08_2,61,64)
	replace s_isco_08_1 = 7 if inrange(s_isco_08_2,71,75)
	replace s_isco_08_1 = 8 if inrange(s_isco_08_2,81,83)
	replace s_isco_08_1 = 9 if inrange(s_isco_08_2,91,96)
	replace s_isco_08_1 = 0 if inrange(s_isco_08_2,01,03)
	cap label drop isco08_1_lab
	run "Help_files\label_isco08_1.do"
	label values s_isco_08_1 isco08_1_lab
	
	recode s_isco_08_1 (.=0) 						// we recode missing values as zero, otherwise we loose too many observations

	
	*************************************************
	* Construct Human capital index for full- and part-time
	*************************************************

	
	* Split sample randomly in two parts:
	set seed 3500 
	bysort search_tag: gen n = _n 			if search_tag!=. 
	gen u_ = runiform() 					if n==1 & search_tag!=. 
	bysort search_tag: egen u = mean(u_)	if search_tag!=.  
	gen training_sample = u>0.50 
	tab training_sample,m
	drop n u_ u

	local val_label : value label s_isco_08_1
	levelsof s_isco_08_1, local(levels)
	local isco_dummy_1
	foreach i of local levels {
	gen byte	isco_1_s`i' = s_isco_08_1==`i'
	local lab_`i' : label `val_label' `i'
	label var isco_1_s`i' " `lab_`i'' "
	local varname = " isco_1_s`i' "
	local isco_dummy_1 : list isco_dummy_1 | varname
	}	
	global isco_dummy_1 " `isco_dummy_1' "
	di "$isco_dummy_1"

	local covariates " $isco_dummy_1  "
	local isco_humancap_var
	local nvarcovar : word count `covariates'
	foreach i in $human_capital {
	local nvartreat : word count `i' 
	forvalues ii = 1/`nvarcovar' { 
			forvalues jj = 1/`nvartreat' { 
			local temp1 : word `ii' of `covariates' 
			local temp2 : word `jj' of `i'
			gen `temp1'X`temp2' = `temp1'*`temp2' 
			local tempname_treat = " `temp1'X`temp2' " 
			local isco_humancap_var: list isco_humancap_var | tempname_treat 
		} 
	} 
	}
	global isco_humancap_var " `isco_humancap_var' "
	di "$isco_humancap_var"
	
* Estimate the sector-specific (isco 1) returns to human capital variables in training sample for full-time workers conditional on profile view (since most of the characteristics are only visible on the profile), then predict the human capital score in training and test sample
	quietly reghdfe  	contact_button_clicked  $human_capital	$isco_humancap_var 	if click_candidate==1  & training_sample==1  & workvolume_c1==1, absorb(rank kanton geschlecht nation_kat name_origin_1_kat  mobility_cat_m limited_contract verfuegbar zuletzt_s gesch_kontakt Sonn_Feiertag Schichtarbeit Nachtarbeit Heimarbeit Lehre profile_length_cat user_logged_in profile_seen_before_cat, savefe) cluster(tracking_id) 
	predict xb_human_index_isco , xb  	
	sum xb_human_index_isco
	
* Creation of human capital index in test sample
	bysort stes_id: egen mean_xb_hc_index_isco = mean(xb_human_index_isco)			if training_sample==0			// we take the average predicted contact rate for each stes_id
	sum mean_xb_hc_index_isco, d
	
* Save the dataset with human capital index based on sector-specific returns
	preserve
	duplicates tag search_tag stes_id , gen(duplicates)
	tab duplicates, m		// 
	drop if duplicates>=1
	drop duplicates
	
	rename training_sample training_sample_human	
		
	collapse training_sample_human mean_xb_hc_index_isco , by(stes_id)
	save "Misc_files\key_human_parttime_isco_new_stes.dta", replace	
	restore
	
	